{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a1ae632a",
   "metadata": {},
   "source": [
    "# Pydantic parser\n",
    "This output parser allows users to specify an arbitrary Pydantic Model and query LLMs for outputs that conform to that schema.\n",
    "\n",
    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed JSON. In the OpenAI family, DaVinci can do reliably but Curie's ability already drops off dramatically. \n",
    "\n",
    "Use Pydantic to declare your data model. Pydantic's BaseModel is like a Python dataclass, but with actual type checking + coercion."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cba6d8e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.output_parsers import PydanticOutputParser\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain_core.pydantic_v1 import BaseModel, Field, validator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0a203100",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = ChatOpenAI(temperature=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b3f16168",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Joke(setup=\"Why don't scientists trust atoms?\", punchline='Because they make up everything!')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define your desired data structure.\n",
    "class Joke(BaseModel):\n",
    "    setup: str = Field(description=\"question to set up a joke\")\n",
    "    punchline: str = Field(description=\"answer to resolve the joke\")\n",
    "\n",
    "    # You can add custom validation logic easily with Pydantic.\n",
    "    @validator(\"setup\")\n",
    "    def question_ends_with_question_mark(cls, field):\n",
    "        if field[-1] != \"?\":\n",
    "            raise ValueError(\"Badly formed question!\")\n",
    "        return field\n",
    "\n",
    "\n",
    "# And a query intented to prompt a language model to populate the data structure.\n",
    "joke_query = \"Tell me a joke.\"\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
    "parser = PydanticOutputParser(pydantic_object=Joke)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
    "    input_variables=[\"query\"],\n",
    "    partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
    ")\n",
    "\n",
    "chain = prompt | model | parser\n",
    "\n",
    "chain.invoke({\"query\": joke_query})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "03049f88",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Cast Away', 'Saving Private Ryan', 'Toy Story', 'The Green Mile'])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Here's another example, but with a compound typed field.\n",
    "class Actor(BaseModel):\n",
    "    name: str = Field(description=\"name of an actor\")\n",
    "    film_names: List[str] = Field(description=\"list of names of films they starred in\")\n",
    "\n",
    "\n",
    "actor_query = \"Generate the filmography for a random actor.\"\n",
    "\n",
    "parser = PydanticOutputParser(pydantic_object=Actor)\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
    "    input_variables=[\"query\"],\n",
    "    partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
    ")\n",
    "\n",
    "chain = prompt | model | parser\n",
    "\n",
    "chain.invoke({\"query\": actor_query})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b11e014",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
